# delimit ;
set more off ;
capture log close ;
clear all ;

/* MASTER FILE FOR EPA WQ GRANT FINAL SP SURVEY DATA PROCESSING AND ANALYSIS */
/* AUTHOR: ROGER H. VON HAEFEN  */
/* LAST EDITED: OCTOBER 1, 2021 */


log using $impute_log, replace text ;
log off ;

/* IMPUTING MISSING VALUES WITH CELL MEANS SEPARATELY BY COUNTY */

/* IN THE PROCESS, CREATING SEVERAL NEW DEMOGRAPHIC VARIABLES */

/* FINAL DEMOGRAPHIC VAIABLES:  */
/* fulltime			*/
/* retired			*/
/* own_home			*/
/* water_bill 			*/
/* hs_diploma 			*/
/* college 			*/
/* adults			*/
/* kids 			*/
/* native 			*/
/* asian 			*/
/* black 			*/
/* white 			*/
/* hawaii 			*/
/* other 			*/
/* hisp_latino			*/

use $survey_data, replace ;
gen fulltime = (employ_status == 1) ; replace fulltime = . if inlist(employ_status,-1,.) ;
label var fulltime "= 1 if employed fulltime" ;

gen retired = (employ_status == 4) ; replace retired = . if inlist(employ_status,-1,.) ;
label var retired "= 1 if retired" ;

gen own_home = (own_rent == 1) ; replace own_home = . if inlist(own_rent,-1,.) ;
label var own_home "= 1 if respondent owns home" ;

replace water_bill = . if water_bill == 3 ;
replace water_bill = 0 if water_bill == 2 ;
label var water_bill "= 1 if respondent directly pays water bill" ;

gen hs_diploma = (educ >= 2) ; replace hs_diploma = . if inlist(educ,-1,.) ;
label var hs_diploma "= 1 if respondent has a high school diploma" ;

gen college = (educ >= 5) ; replace college = . if inlist(educ,-1,.) ;
label var college "= 1 if respondent has a 4-year college degree" ;

replace educ = -1 if educ == . ;

capture program drop imp1 ;
program imp1 ;
      qui sum resp_id if county == "G" ;
      local g2 = r(N) ;
      qui sum resp_id if county == "M" ;
      local m2 = r(N) ;
      qui sum resp_id if county == "W" ;
      local w2 = r(N) ;
      qui replace `1' = . if `1' == -1 ;
      qui sum `1' if county == "G" ;
      local g  = r(mean) ;
      local g1 = r(N) ;
      qui replace `1' = r(mean) if `1' == . & county == "G" ;
      qui sum `1' if county == "M" ;
      local m  = r(mean) ;
      local m1 = r(N) ;
      qui replace `1' = r(mean) if `1' == . & county == "M" ;
      qui sum `1' if county == "W" ;
      qui replace `1' = r(mean) if `1' == . & county == "W" ;
      local w  = r(mean) ;
      local w1 = r(N) ;
      di ;
      di "VARIABLE: `1'" ;
      di "County      % Missing       Mean" ;
      di "--------------------------------" ;
      di "Guilford    " (100*(1-`g1'/`g2')) "    " (`g') ;
      di "Mecklenburg " (100*(1-`m1'/`m2')) "    " (`m') ;
      di "Wake        " (100*(1-`w1'/`w2')) "    " (`w') ;      
end ;

log on ;
foreach y of varlist gender age fulltime retired own_home water_bill hs_diploma college adults kids { ;
   imp1 `y' ;
} ;
foreach y of varlist native asian black white hawaii other hisp_latino { ;
   imp1 `y' ;
} ;

sum income ;
gen inc_reported = (income~=-1) ;
label var inc_reported "=1 if R reported HH income" ;
gen w_county = (county == "W") ;
gen g_county = (county == "G") ;
gen m_county = (county == "M") ;
gen age2 = age^2 ;

/* ASCERTAINING WHAT INCOME NON-RESPONSE IS CORRELATED WITH */

probit inc_reported gender age age2 fulltime retired own_home water_bill hs_diploma college adults kids native asian black white hawaii other hisp_latino w_county g_county m_county, robust ;


/* IMPUTING MISSING INCOMES NY COUNTY */

replace income = . if income == -1 ;
poisson income gender age age2 fulltime retired own_home hs_diploma college adults kids black hisp_latino if county == "W", robust ;
predict inc1 ;
replace income = inc1 if income == . & county == "W" ;
drop inc1 ;
poisson income gender age age2 fulltime retired own_home hs_diploma college adults kids black hisp_latino if county == "G", robust ;
predict inc1 ;
replace income = inc1 if income == . & county == "G" ;
drop inc1 ;
poisson income gender age age2 fulltime retired own_home hs_diploma college adults kids black hisp_latino if county == "M", robust ;  
predict inc1 ;
replace income = inc1 if income == . & county == "M" ;
drop inc1 ;
drop w_county g_county m_county age2 inc_reported ;

sum income gender age fulltime retired own_home hs_diploma college adults kids asian black white ;
log close ;

order resp_id code survey_duration survey_date access_mode block_id block_f county          
gender age income employ_status fulltime retired own_rent own_home water_bill      
educ hs_diploma college adults kids resident native asian black white hawaii other hisp_latino ;
sum gender age income employ_status fulltime retired own_rent own_home water_bill      
educ hs_diploma college adults kids resident native asian black white hawaii other hisp_latino ;
sort county ;
by county : sum gender age income employ_status fulltime retired own_rent own_home water_bill      
educ hs_diploma college adults kids resident native asian black white hawaii other hisp_latino ;
sort resp_id ;
compress ;
save $imputed_data, replace ;

